%% Assignment 3; Precipitation
% 3.2	Rainfall undercatch
clear all;close all;

%% Define dataset and variables
load data;

%% Assignment 3.2.1
% 1.	What do you expect of the correlation for the periods:
% a.	Before 15th July 2002 (both stations are unshielded?
% Due to the which had more influence on the amount of rain caught by the
% station. More wind results in a lower amount of rain caught by the meter.
% This can be seen in excercise 3.1. But since both are unshielded, both
% meter should catch the same fraction of rainfall and therefor should have
% large correlation.

% b.	Between 15th July 2002 and 25th September 2008 (only Cabauw is shielded)?
% Since only the Cabauw was shielded in that period, Cabauw caught a higher
% fraction of the rain compared to station Bilt. The correlation is lower
% compared to the other periods.

% c.	After 25th September 2008 (both stations are shielded)?
% Both stations are shielded, both stations should catch the same fraction
% of the fallen rain (many assumptions). Only geographical differences,
% having influence on the actual rainfall can be considered small, since
% they lay only 30km apart. A large correlation can be expected.

%% Assignment 3.2.2 Period 1
% Period before 15th July 2002 (both stations are unshielded)
CabPer1         = data.Cabauw.etmaalsom(data.Cabauw.date<20020715);
BilPer1         = data.Bilt.etmaalsom(data.Bilt.date<20020715);
meanCabPer1     = mean(CabPer1);
meanBilPer1     = mean(BilPer1);

sCabPer1        = (CabPer1-meanCabPer1).^2;
sBilPer1        = (BilPer1-meanBilPer1).^2;
sCabBilPer1     = (CabPer1-meanCabPer1).* (BilPer1-meanBilPer1);

sumsCabPer1     = sum(sCabPer1);
sumsBilPer1     = sum(sBilPer1);
sumsCabBilPer1  = sum(sCabBilPer1);

n_1CabPer1      = length(sCabPer1)-1;
n_1BilPer1      = length(sBilPer1)-1;
n_1CabBilPer1   = length(sCabBilPer1)-1;

sCabPer1_2      = sumsCabPer1/n_1CabPer1;
sBilPer1_2      = sumsBilPer1/n_1BilPer1;
sCabBilPer1_2   = sumsCabBilPer1/n_1CabBilPer1;

sqrtCabPer1     = sqrt(sCabPer1_2);
sqrtBilPer1     = sqrt(sBilPer1_2);

% trend equation y = a*x+b
a               = sCabPer1_2 / sCabBilPer1_2;
b               = sBilPer1_2 / sCabBilPer1_2;

r               = sCabBilPer1_2 / (sqrt(sCabPer1_2)*sqrt(sBilPer1_2));
r_2             = r^2;

xMax            = max([CabPer1; BilPer1]);

x               = linspace(0,xMax,100);
y               = a * x + b;

fprintf('equation for the trendline is:\ny = %f * x + %f \nr^2 equals %f\n',a,b,r_2);

%   Plot correlation
figure(1);
cax = gca;
set(cax,'NextPlot','add');
xlim([-10 xMax+10]); ylim([-10 xMax+10]);
axis equal;
scat=scatter(cax,CabPer1, BilPer1);
plot(cax,x,y);

%% Assignment 3.2.2 Period 2
% Period between 15th July 2002 and 25th September 2008 (only Cabauw is shielded)
CabPer2         = data.Cabauw.etmaalsom(data.Cabauw.date>=20020715 & data.Cabauw.date<=20080925);
BilPer2         = data.Bilt.etmaalsom(data.Bilt.date>=20020715 & data.Bilt.date<=20080925);
meanCabPer2     = mean(CabPer2);
meanBilPer2     = mean(BilPer2);

sCabPer2        = (CabPer2-meanCabPer2).^2;
sBilPer2        = (BilPer2-meanBilPer2).^2;
sCabBilPer2     = (CabPer2-meanCabPer2).* (BilPer2-meanBilPer2);

sumsCabPer2     = sum(sCabPer2);
sumsBilPer2     = sum(sBilPer2);
sumsCabBilPer2  = sum(sCabBilPer2);

n_1CabPer2      = length(sCabPer2)-1;
n_1BilPer2      = length(sBilPer2)-1;
n_1CabBilPer2   = length(sCabBilPer2)-1;

sCabPer2_2      = sumsCabPer2/n_1CabPer2;
sBilPer2_2      = sumsBilPer2/n_1BilPer2;
sCabBilPer2_2   = sumsCabBilPer2/n_1CabBilPer2;

sqrtCabPer2     = sqrt(sCabPer2_2);
sqrtBilPer2     = sqrt(sBilPer2_2);

% trend equation y = a*x+b
a               = sCabPer2_2 / sCabBilPer2_2;
b               = sBilPer2_2 / sCabBilPer2_2;

r               = sCabBilPer2_2 / (sqrt(sCabPer2_2)*sqrt(sBilPer2_2));
r_2             = r^2;

xMax            = max([CabPer2; BilPer2]);

x               = linspace(0,xMax,100);
y               = a * x + b;

fprintf('equation for the trendline is:\ny = %f * x + %f \nr^2 equals %f\n',a,b,r_2);

%   Plot correlation
figure(2);
cax = gca;
set(cax,'NextPlot','add');
xlim([0 xMax]); ylim([0 xMax]);
axis equal;
scatter(cax,CabPer2, BilPer2);
plot(cax,x,y);

%% Assignment 3.2.2 Period 1
% After 25th September 2008 (both stations are shielded)
CabPer3         = data.Cabauw.etmaalsom(data.Cabauw.date>20080925);
BilPer3         = data.Bilt.etmaalsom(data.Bilt.date>20080925);
meanCabPer3     = mean(CabPer3);
meanBilPer3     = mean(BilPer3);

sCabPer3        = (CabPer3-meanCabPer3).^2;
sBilPer3        = (BilPer3-meanBilPer3).^2;
sCabBilPer3     = (CabPer3-meanCabPer3).* (BilPer3-meanBilPer3);

sumsCabPer3     = sum(sCabPer3);
sumsBilPer3     = sum(sBilPer3);
sumsCabBilPer3  = sum(sCabBilPer3);

n_1CabPer3      = length(sCabPer3)-1;
n_1BilPer3      = length(sBilPer3)-1;
n_1CabBilPer3   = length(sCabBilPer3)-1;

sCabPer3_2      = sumsCabPer3/n_1CabPer3;
sBilPer3_2      = sumsBilPer3/n_1BilPer3;
sCabBilPer3_2   = sumsCabBilPer3/n_1CabBilPer3;

sqrtCabPer3     = sqrt(sCabPer3_2);
sqrtBilPer3     = sqrt(sBilPer3_2);

% trend equation y = a*x+b
a               = sCabPer3_2 / sCabBilPer3_2;
b               = sBilPer3_2 / sCabBilPer3_2;

r               = sCabBilPer3_2 / (sqrt(sCabPer3_2)*sqrt(sBilPer3_2));
r_2             = r^2;

xMax            = max([CabPer3; BilPer3]);

x               = linspace(0,xMax,100);
y               = a * x + b;

fprintf('equation for the trendline is:\ny = %f * x + %f \nr^2 equals %f\n',a,b,r_2);

%   Plot correlation
figure(3);
cax = gca;
set(cax,'NextPlot','add');
xlim([0 xMax]); ylim([0 xMax]);
axis equal;
scatter(cax,CabPer3, BilPer3);
plot(cax,x,y);

%% Assignment 3.2.2 Analyses
% As expected, the second period has the lowest correlation. And the
% highest correlation can be found in the last period when both meters are
% shielded.